Hi! Welcome to where my work is :) You can scroll through chronologically, or skip to particular sections you are interested in from the navigation bar on the left - please give it a while to load then mouse-over it.
Set working directory first
Loading required libraries
library(sf)
library(rgdal)
library(raster)
library(mapview)
library(spatstat)
library(sp)
library(rgeos)
library(maptools)
library(GISTools)
library(tmap)
library(sf)
library(geojson)
library(geojsonio)
library(tmaptools)
Opening shapefile, taken from ukdataservice.ac.uk, giving a map of Greater Manchester by 2011 English Census Merged Wards (hence data for basemap shapefile is by ward level).
require(rgdal)
require(ggplot2)
shp <- readOGR(dsn ="BoundaryData", layer="england_cmwd_2011", stringsAsFactors = F)
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\Tommy\Desktop\GIS_proj\Github\CASA0005GISProj\BoundaryData", layer: "england_cmwd_2011"
## with 215 features
## It has 4 fields
plot(shp)

#checking Coordinate Reference System of the shapefile, which was originally NA
summary(shp)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
## min max
## x 351662.6 406087.2
## y 381165.4 421037.7
## Is projected: TRUE
## proj4string :
## [+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000
## +y_0=-100000 +datum=OSGB36 +units=m +no_defs +ellps=airy
## +towgs84=446.448,-125.157,542.060,0.1502,0.2470,0.8421,-20.4894]
## Data attributes:
## label altname name
## Length:215 Length:215 Length:215
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## code
## Length:215
## Class :character
## Mode :character
crs(shp)
## CRS arguments:
## +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000
## +y_0=-100000 +datum=OSGB36 +units=m +no_defs +ellps=airy
## +towgs84=446.448,-125.157,542.060,0.1502,0.2470,0.8421,-20.4894
Specifying CRS of the greater manchester area shapefile as EPSG 4326 for latitude/longiture
Manchester <- spTransform(shp, CRS("+init=epsg:4326"))
st_crs(Manchester)
## Coordinate Reference System:
## EPSG: 4326
## proj4string: "+proj=longlat +datum=WGS84 +no_defs"
Plot Manchester basemap
plot(Manchester)

Extract just the city of Manchester itself rather than greater Manchester, as the area would otherwise be too big for analysis and Ripley’s K.
#extract the city
cityManchester <- subset(Manchester, name %in% c("Didsbury West","Fallowfield","Gorton North","Gorton South","Harpurhey","Higher Blackley","Hulme","Levenshulme","Longsight","Miles Platting and Newton Heath","Moss Side","Moston","Northenden","Old Moat","Rusholme","Sharston","Whalley Range","Withington","Woodhouse Park","Ancoats and Clayton","Ardwick","Baguley","Bradford","Brooklands","Burnage","Charlestown","Cheetham","Chorlton","Chorlton Park","City Centre","Crumpsall","Didsbury East"))
#Check to see that the correct borough has been pulled out
tm_shape(cityManchester) +
tm_polygons(col = NA, alpha = 0.5)

Read in police station data for greater manchester area, from the geoJSON file obtained using overpass-turbo.eu API and running a query for police stations in the Greater Manchester Area from OpenStreetMap.
Keeping only the points, as readOGR cannot read both points and polygons at the same time, and the points represent police stations.
pol <- rgdal::readOGR("C:/Users/Tommy/Desktop/GIS_proj/BoundaryData/polstn.geojson",require_geomType="wkbPoint")
## OGR data source with driver: GeoJSON
## Source: "C:\Users\Tommy\Desktop\GIS_proj\BoundaryData\polstn.geojson", layer: "polstn"
## with 114 features;
## Selected wkbPoint feature type, with 51 rows
## It has 42 fields
Plotting the police station points
plot(pol)

summary(pol)
## Object of class SpatialPointsDataFrame
## Coordinates:
## min max
## coords.x1 -2.89431 -1.741055
## coords.x2 53.25742 53.778362
## Is projected: FALSE
## proj4string :
## [+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0]
## Number of points: 51
## Data attributes:
## id X.id amenity building
## node/1123287516: 1 node/1123287516: 1 police:51 office: 0
## node/1131472551: 1 node/1131472551: 1 yes : 0
## node/1290032691: 1 node/1290032691: 1 NA's :51
## node/1429962455: 1 node/1429962455: 1
## node/1865698140: 1 node/1865698140: 1
## node/233211757 : 1 node/233211757 : 1
## (Other) :45 (Other) :45
## type addr.postcode addr.street
## multipolygon: 0 OL7 0BQ : 1 Grappenhall Road: 1
## NA's :51 PR25 2EX: 1 Lancastergate : 1
## WA4 2AF : 1 Manchester Road : 1
## WA6 7NW : 1 Ship Street : 1
## BL1 8UN : 0 Spendmore Lane : 1
## (Other) : 0 (Other) : 1
## NA's :47 NA's :45
## contact.phone contact.website
## +44 161 872 5050: 0 http://www.gmp.police.uk: 0
## NA's :51 NA's :51
##
##
##
##
##
## name opening_hours
## 'B' Divisional Headquarters : 1 Mo-Sa 10:00-18:00: 0
## Adlington : 1 NA's :51
## Altrincham Police Station : 1
## Ashton under lyne Police Station: 1
## Bury Police Station : 1
## (Other) :19
## NA's :27
## note building.colour building.levels building.material
## Layout appoximate: 0 wheat: 0 2 : 0 brick: 0
## NA's :51 NA's :51 NA's:51 NA's :51
##
##
##
##
##
## height roof.colour roof.material roof.shape
## 4 : 0 grey: 0 gravel: 0 flat : 0
## NA's:51 NA's:51 NA's :51 round: 0
## NA's :51
##
##
##
##
## source addr.city
## OS_OpenData_StreetView: 2 Frodsham, Cheshire : 1
## OS OpenData StreetView: 1 Huddersfield : 1
## survey : 1 Leyland : 1
## Bing : 0 Manchester : 1
## Bing aerial image : 0 Stockton Heath, Warrington: 1
## (Other) : 0 (Other) : 0
## NA's :47 NA's :46
## phone fhrs.id operator
## +44 161 856 5629: 0 335284: 0 Cheshire Constabulary : 0
## +44 161 872 5050: 1 NA's :51 Cheshire Police : 2
## 0845 458 6392 : 0 Greater Manchester Police: 1
## NA's :50 Lancashire Constabulary : 1
## NA's :47
##
##
## wheelchair addr.housename source.building
## yes : 1 Atherton Police Station : 0 Bing: 0
## NA's:50 Cheshire Constabulary - Police Station: 1 NA's:51
## Police Headquarters : 0
## Stockton Heath Police Station : 1
## NA's :49
##
##
## source.geometry
## Bing: 0
## NA's:51
##
##
##
##
##
## designation
## Stockport Divisional Headquarters Greater Manchester Police: 0
## NA's :51
##
##
##
##
##
## roof.levels addr.housenumber
## 1 : 0 37 : 0
## NA's:51 NA's:51
##
##
##
##
##
## website
## http://www.gmp.police.uk : 0
## https://www.cheshire.police.uk/contact-us/police-stations-and-custody/northwich-police-station.aspx: 0
## NA's :51
##
##
##
##
## created_by is_in listed_status
## Potlatch 0.10b: 1 Fulwood: 1 Grade II: 2
## Potlatch 0.6a : 1 NA's :50 NA's :49
## NA's :49
##
##
##
##
## wikimedia_commons wheelchair.description
## File:Police Station, Warrington.jpg: 1 No Longer in use: 1
## NA's :50 NA's :50
##
##
##
##
##
## entrance
## yes : 1
## NA's:50
##
##
##
##
##
## description
## Opening Hours: Monday: 8am to 11pm Tuesday: 8am to 11pm Wednesday: 8am to 11pm Thursday: 8am to 11pm Friday: 8am to 11pm Saturday: 8am to 11pm Sunday: 8am to 11pm Bank Holidays: 8am to 11pm: 1
## NA's :50
##
##
##
##
##
## ele level HE_ref
## Street Level: 1 0 : 1 1240198: 1
## NA's :50 NA's:50 NA's :50
##
##
##
##
##
Checking the CRS of the police station points - it is already EPSG4326!
st_crs(pol)
## Coordinate Reference System:
## EPSG: 4326
## proj4string: "+proj=longlat +datum=WGS84 +no_defs"
Removing duplicates from police station points
polstns <- remove.duplicates(pol)
Plotting the police station points on top of the greater manchester area map using mapview
mapview::mapview(Manchester) + mapview::mapview(polstns, color = "white", col.regions = "black",legend=FALSE)
Basemap of Manchester is a SpatialPolygonsDataFrame Police station points is a SpatialPointsDataFrame
class(cityManchester)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
class(polstns)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
Plot the police stations in the Greater Manchester area using tmap
tmap_mode("view")
tm_shape(Manchester) +
tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polstns) +
tm_dots(col = "blue")
Plot the police stations in the Manchester city area using tmap
tmap_mode("view")
tm_shape(cityManchester) +
tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polstns) +
tm_dots(col = "blue")
Filtering for police stations which are within the Manchester city boundary
proj4string(cityManchester) <- CRS("+init=epsg:4326")
proj4string(polstns) <- CRS("+init=epsg:4326")
polsub <- polstns[cityManchester,]
#checking to see that they've been removed
tmap_mode("view")
tm_shape(cityManchester) +
tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polsub) +
tm_dots(col = "blue")
Reading in the dataset for population density
library(readr)
popdens <- read_csv("popdens.csv")
summary(popdens)
## geography popdens
## Length:32 Min. : 12.00
## Class :character 1st Qu.: 37.40
## Mode :character Median : 47.95
## Mean : 52.83
## 3rd Qu.: 66.53
## Max. :116.70
Joining first dataset of population density with the Manchester basemap
require(sp)
?sp::merge
manchester_popdens <- merge(cityManchester, popdens, by.x = "name", by.y = "geography",duplicateGeoms = TRUE)
Plotting map to test if join has succeeded
tmap_mode("view")
tm_shape(manchester_popdens) +
tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polsub) +
tm_dots(col = "blue")
Reading in the dataset for deprivation - the average number (out of 4 - employment, education, health and disability and household overcrowding) of dimensions a household was deprived in was used
depr <- read_csv("deprivation.csv")
summary(depr)
## geography deprivation
## Length:32 Min. :0.5989
## Class :character 1st Qu.:0.9484
## Mode :character Median :1.1398
## Mean :1.0764
## 3rd Qu.:1.2323
## Max. :1.4070
Joining second dataset of deprivation with the Manchester basemap
manchester_2 <- merge(manchester_popdens, depr, by.x = "name", by.y = "geography",duplicateGeoms = TRUE)
Plotting map to check if second join has succeeded
tmap_mode("view")
tm_shape(manchester_2) +
tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polsub) +
tm_dots(col = "blue")
Reading in the dataset for SES - the average social grade by ward was taken, by using a numeric scale of AB = 1, C1 = 2, C2 = 3, DE = 4, so lower numbers would mean a higher social grade.
ses <- read_csv("SES.csv")
summary(ses)
## geography socialgrade
## Length:32 Min. :1.739
## Class :character 1st Qu.:2.404
## Mode :character Median :2.744
## Mean :2.620
## 3rd Qu.:2.896
## Max. :3.119
Joining third dataset of SES with the Manchester basemap
manchester_merged <- merge(manchester_2, ses, by.x = "name", by.y = "geography",duplicateGeoms = TRUE)
Plotting map to check if third join has succeeded
tmap_mode("view")
tm_shape(manchester_merged) +
tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polsub) +
tm_dots(col = "blue")
manchester_merged is the map of Manchester city with merged data on population density, social grade and SES.
Next step would be to map the crime datapoints onto the Manchester basemap. Crime data for June 2019 for the Greater Manchester area was used. This was taken from https://data.police.uk/data/
crime <- read_csv("manchester_crime.csv")
crime_locations <- st_as_sf(crime, coords = c("Longitude","Latitude"), crs = 4326)
class(crime)
## [1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
require(sf)
crime_sf <- st_as_sf(x = crime,
coords = c("Longitude", "Latitude"),
crs = "+init=epsg:4326")
# simple plot
plot(crime_sf)

crime_spdf <- as(crime_sf, "Spatial")
manchester_crime_test<-ggplot(crime, aes(x=Longitude,y=Latitude))+geom_point()+coord_equal()
manchester_crime_test
Plotting a 2D KDE of crime occurences in Manchester using the longitude and latitude data in the crime dataset
manchester_crime_test<-ggplot(crime, aes(x=Longitude,y=Latitude))+stat_bin2d(bins=30)
manchester_crime_test
Plotting a continuous distribution instead
manchester_crime_test+stat_density2d(aes(fill = ..level..), geom="polygon")

Plotting the crime datapoints to test
plot(crime_locations)

Examining metadata of the crime datapoints
summary(crime_locations)
## geometry
## POINT :8828
## epsg:4326 : 0
## +proj=long...: 0
Checking CRS of crime datapoints
crs(crime_locations)
## [1] "+proj=longlat +datum=WGS84 +no_defs"
Checking type of data of the crimem locations - sf, tbl_df, tbl, data.frame
class(crime_locations)
## [1] "sf" "tbl_df" "tbl" "data.frame"
Ensuring that crime_locations has EPSG 4326 as CRS
crime_locations_sp <- as(crime_locations, 'Spatial')
crime_locations_sp <- spTransform(crime_locations_sp, CRS("+init=epsg:4326"))
Checking new CRS of crime datapoints and data type, removing duplicates
crs(crime_locations)
## [1] "+proj=longlat +datum=WGS84 +no_defs"
class(crime_locations)
## [1] "sf" "tbl_df" "tbl" "data.frame"
Plotting crime data points on Manchester basemap
mapview::mapview(manchester_merged) + mapview::mapview(crime_locations, color = "white", col.regions = "black",legend=FALSE, title = "Crime occurences in Manchester city")
library(tidyverse)
library(downloader)
library(rgdal)
library(sf)
library(ggplot2)
library(reshape2)
library(plotly)
library(highcharter)
Histograms for each of the scale variables - population densities, deprivation, and SES Starting with popdens
# Histogram with mean line (red) and median line (blue) and density plot
ggplot(popdens, aes(x=popdens)) +
geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth=10)+
geom_density(alpha=.2, fill="#FF6666") + geom_vline(aes(xintercept=mean(popdens)),
color="red", linetype="dashed", size=1) + geom_vline(aes(xintercept=median(popdens)),
color="blue", linetype="dashed", size=1)

# Descriptive statistics
print(max(popdens$popdens))
## [1] 116.7
print(min(popdens$popdens))
## [1] 12
print(median(popdens$popdens))
## [1] 47.95
print(sd(popdens$popdens))
## [1] 22.6814
print(IQR(popdens$popdens))
## [1] 29.125
print(quantile(popdens$popdens, c(.25, .75)))
## 25% 75%
## 37.400 66.525
Plotting histogram for deprivation next
# Histogram with mean line (red) and median line (blue) and density plot
ggplot(depr, aes(x=deprivation)) +
geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth= 0.1)+
geom_density(alpha=.2, fill="#FF6666") + geom_vline(aes(xintercept=mean(deprivation)),
color="red", linetype="dashed", size=1) + geom_vline(aes(xintercept=median(deprivation)),
color="blue", linetype="dashed", size=1)

# Descriptive statistics
print(max(depr$deprivation))
## [1] 1.406953
print(min(depr$deprivation))
## [1] 0.5988858
print(median(depr$deprivation))
## [1] 1.139765
print(sd(depr$deprivation))
## [1] 0.2171107
print(IQR(depr$deprivation))
## [1] 0.283867
print(quantile(depr$deprivation, c(.25, .75)))
## 25% 75%
## 0.9484226 1.2322897
Plotting histogram for social grade next
# Histogram with mean line (red) and median line (blue) and density plot
ggplot(ses, aes(x=socialgrade)) +
geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth= 0.1)+
geom_density(alpha=.2, fill="#FF6666") + geom_vline(aes(xintercept=mean(socialgrade)),
color="red", linetype="dashed", size=1) + geom_vline(aes(xintercept=median(socialgrade)),
color="blue", linetype="dashed", size=1)

# Descriptive statistics
print(max(ses$socialgrade))
## [1] 3.118662
print(min(ses$socialgrade))
## [1] 1.738781
print(median(ses$socialgrade))
## [1] 2.744045
print(sd(ses$socialgrade))
## [1] 0.3916912
print(IQR(ses$socialgrade))
## [1] 0.4920259
print(quantile(ses$socialgrade, c(.25, .75)))
## 25% 75%
## 2.403788 2.895813
Plotting histogram for crimes recorded per ward next
#reading in data for crimes recorded per ward (taken from later part of the code after performing count)
library(readr)
crime_ward <- read_csv("crime_ward.csv")
summary(crime_ward)
## geography crimes
## Length:32 Min. : 96.0
## Class :character 1st Qu.: 145.2
## Mode :character Median : 183.0
## Mean : 275.7
## 3rd Qu.: 330.8
## Max. :1887.0
crime_ward$lnCrime <- log1p(crime_ward$crimes)
# Histogram with mean line (red) and median line (blue) and density plot
ggplot(crime_ward, aes(x=lnCrime)) +
geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth= 0.05)+
geom_density(alpha=.2, fill="#FF6666") + geom_vline(aes(xintercept=mean(lnCrime)),
color="red", linetype="dashed", size=1) + geom_vline(aes(xintercept=median(lnCrime)),
color="blue", linetype="dashed", size=1)

# Descriptive statistics
print(max(crime_ward$crimes))
## [1] 1887
print(min(crime_ward$crimes))
## [1] 96
print(median(crime_ward$crimes))
## [1] 183
print(sd(crime_ward$crimes))
## [1] 313.0229
print(IQR(crime_ward$crimes))
## [1] 185.5
print(quantile(crime_ward$crimes, c(.25, .75)))
## 25% 75%
## 145.25 330.75
Converting both cityManchester and crime_locations to data.frames to run the KDE, since it requires numeric input
df_cityManchester <- as.data.frame(cityManchester)
df_crime_locations <- as.data.frame(crime_locations)
Create two separate fields for latitude and longitude from the manchester spatialpolygonsdataframe
polys = attr(cityManchester,'polygons')
npolys = length(polys)
for (i in 1:npolys){
poly = polys[[i]]
polys2 = attr(poly,'Polygons')
npolys2 = length(polys2)
for (j in 1:npolys2){
#do stuff with these values
coords = coordinates(polys2[[j]])
}
}
coords_df <- as.data.frame(coords)
Plotting out crime occurences in Manchestaer city
ggplot() + geom_polygon(data = coords_df, aes(x = V1, y = V2), fill = "grey75") +
geom_point(data = crime, aes(x = Longitude, y = Latitude),
col = "dodger blue", alpha = .5, size = 1.5) +
coord_equal() +
ggtitle("Crime occurences in Manchester")

Counting number of crime occurence points per polygon
class(manchester_merged)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
class(crime_spdf)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
proj4string(manchester_merged)
## [1] "+init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
crime_spdf <- spTransform(crime_spdf, CRS("+init=epsg:4326"))
proj4string(crime_spdf)
## [1] "+init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
plot(manchester_merged)
plot(crime_spdf, col="red" , add=TRUE)

res <- over(crime_spdf, manchester_merged)
table(res$name)
##
## Ancoats and Clayton Ardwick
## 484 368
## Baguley Bradford
## 179 333
## Brooklands Burnage
## 136 149
## Charlestown Cheetham
## 176 425
## Chorlton Chorlton Park
## 108 137
## City Centre Crumpsall
## 1887 270
## Didsbury East Didsbury West
## 101 126
## Fallowfield Gorton North
## 151 298
## Gorton South Harpurhey
## 212 443
## Higher Blackley Hulme
## 355 213
## Levenshulme Longsight
## 199 187
## Miles Platting and Newton Heath Moss Side
## 334 219
## Moston Northenden
## 148 171
## Old Moat Rusholme
## 136 172
## Sharston Whalley Range
## 151 129
## Withington Woodhouse Park
## 96 330
After plotting number of crimes per ward on a .csv file, time to merge with the manchester_merged basemap. Reading in the dataset for crimes per ward using code chunk directly above
summary(crime_ward)
## geography crimes lnCrime
## Length:32 Min. : 96.0 Min. :4.575
## Class :character 1st Qu.: 145.2 1st Qu.:4.985
## Mode :character Median : 183.0 Median :5.215
## Mean : 275.7 Mean :5.380
## 3rd Qu.: 330.8 3rd Qu.:5.804
## Max. :1887.0 Max. :7.543
Joining crimes per ward dataset with the Manchester basemap
manchester_merged_final <- merge(x= manchester_merged, y=crime_ward, by.x = "name", by.y = "geography")
Plotting map to check if join has succeeded.
library(tmap)
tmap_mode("view")
tm_shape(manchester_merged_final) +
tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polsub) +
tm_dots(col = "blue")
All data (popdens, social grade, SES, crime) on ward level loaded, locations of 4 police stations in Manchester city also loaded.
Making a choropleth map by crimes
library(ggplot2)
library(RColorBrewer)
library(classInt)
names(manchester_merged_final)
## [1] "name" "label" "altname" "code" "popdens"
## [6] "deprivation" "socialgrade" "crimes" "lnCrime"
#spplot(manchester_merged_final, "crimes")
# quantile breaks
breaks_qt <- classIntervals(manchester_merged_final$crimes, n = 7, style = "quantile")
br <- breaks_qt$brks
offs <- 0.0000001
br[1] <- br[1] - offs
br[length(br)] <- br[length(br)] + offs
# categories for choropleth map
manchester_merged_final$crimes_bracket <- cut(manchester_merged_final$crimes, br)
# plot
#my.palette <- brewer.pal(n = 7, name = "OrRd")
#spplot(manchester_merged_final, "crimes_bracket", col.regions=my.palette, main = "Manchester City Recorded Crimes in June 2019 by Ward")
#Using tmap
library(tmap)
tm_shape(manchester_merged_final) +
tm_polygons("crimes",
style="quantile",
title="Manchester city \nrecorded crimes \nby ward in June 2019")
tmap_mode("view")
Making a choropleth map by popdens
# quantile breaks
breaks_qt <- classIntervals(manchester_merged_final$popdens, n = 7, style = "quantile")
br <- breaks_qt$brks
offs <- 0.0000001
br[1] <- br[1] - offs
br[length(br)] <- br[length(br)] + offs
# categories for choropleth map
manchester_merged_final$popdens_bracket <- cut(manchester_merged_final$popdens, br)
# plot
#spplot(manchester_merged_final, "popdens_bracket", col.regions=my.palette, main = "Manchester City Population Density by Ward")
#Using tmap
library(tmap)
tm_shape(manchester_merged_final) +
tm_polygons("popdens",
style="quantile",
title="Manchester city \nPopulation Density \nby ward")
tmap_mode("view")
Making a choropleth map by deprivation
#spplot(manchester_merged_final, "deprivation")
breaks_qt <- classIntervals(manchester_merged_final$deprivation, n = 7, style = "quantile")
br <- breaks_qt$brks
offs <- 0.0000001
br[1] <- br[1] - offs
br[length(br)] <- br[length(br)] + offs
# categories for choropleth map
manchester_merged_final$deprivation_bracket <- cut(manchester_merged_final$deprivation, br)
# plot
#spplot(manchester_merged_final, "deprivation_bracket", col.regions=my.palette, main = "Manchester City deprivation by Ward")
#Using tmap
library(tmap)
tm_shape(manchester_merged_final) +
tm_polygons("deprivation",
style="quantile",
title="Manchester city \ndeprivation \nby ward")
tmap_mode("view")
Making a choropleth map by social grade
#spplot(manchester_merged_final, "socialgrade")
breaks_qt <- classIntervals(manchester_merged_final$socialgrade, n = 7, style = "quantile")
br <- breaks_qt$brks
offs <- 0.0000001
br[1] <- br[1] - offs
br[length(br)] <- br[length(br)] + offs
# categories for choropleth map
manchester_merged_final$socialgrade_bracket <- cut(manchester_merged_final$socialgrade, br)
# plot
#spplot(manchester_merged_final, "socialgrade_bracket", col.regions=my.palette, main = "Manchester City social grade by Ward")
#Using tmap
library(tmap)
tm_shape(manchester_merged_final) +
tm_polygons("socialgrade",
style="quantile",
title="Manchester city \nsocial grade \nby ward")
tmap_mode("view")
Next, point pattern analysis will be conducted, to confirm the visual suspicion that there is spatial clustering of crimes within Manchester city. The entire Manchester city was too large to conduct analysis on, and hence this particular study zoomed in on a particular ward - Bradford. Bradford was picked as it has two police stations, so it would make an interesting study to see if the clusters are near the police stations.
We have to create an observation window for spatstat to carry out analysis within – set to the extent of the Bradford ward.
Thus, pull out the Bradford ward.
#extract the Bradford ward
bradford <- manchester_merged_final[manchester_merged_final@data$name=="Bradford",]
#Checking to see if it has been pulled out successfully
tm_shape(bradford) +
tm_polygons(col = NA, alpha = 0.5)
#sf object:
bradfordSF <- st_as_sf(manchester_merged_final)
bradfordSF <- bradfordSF[bradfordSF$name=="Bradford",]
We need to clip the crime occurence datapoints so that we have a subset of just those that fall within the Bradford ward.
#clip the data to our single ward
library(maptools)
crime_respdf <- readShapePoints("crime_exported.shp", proj4string = CRS("+init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"), verbose = FALSE, repair=FALSE)
polsub_bradford <- polsub[bradford,]
Check that it’s worked - blue dots are crime occurrences and red dots are police stations in Bradford
crime_bradford <- crime_respdf[bradford,]
crs(crime_bradford)
## CRS arguments:
## +init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84
## +towgs84=0,0,0
tmap_mode("view")+tm_shape(bradford) +tm_polygons(col = NA, alpha = 0.5) +tm_shape(crime_respdf[bradford,]) + tm_dots(col = "blue") + tm_shape(polsub_bradford) +tm_dots(col = "red")
Create an observation window for spatstat to carry out analysis
library(spatstat)
library(sp)
library(rgeos)
library(maptools)
library(GISTools)
library(tmap)
library(sf)
library(geojson)
library(geojsonio)
library(tmaptools)
BNG = "+init=epsg:27700"
bradfordBNG <- spTransform(bradford,BNG)
window <- as.owin(bradfordBNG)
plot(window)

crime_bradfordBNG <-spTransform(crime_bradford,BNG)
For point pattern analysis, have to create a point pattern (ppp) object
crime_bradford.ppp <- ppp(x=crime_bradfordBNG@coords[,1],y=crime_bradfordBNG@coords[,2],window=window)
Checking the ppp object
plot(crime_bradford.ppp,pch=16,cex=0.5, main="Crime occurrences in Manchester city, Bradford ward, June 2019")

class(crime_bradford.ppp)
## [1] "ppp"
summary(crime_bradford.ppp)
## Planar point pattern: 333 points
## Average intensity 6.449476e-05 points per square unit
##
## *Pattern contains duplicated points*
##
## Coordinates are given to 2 decimal places
## i.e. rounded to the nearest multiple of 0.01 units
##
## Window: polygonal boundary
## single connected closed polygon with 277 vertices
## enclosing rectangle: [385196.5, 390147.4] x [396899.8, 399214.2] units
## Window area = 5163210 square units
## Fraction of frame area: 0.451
plot(crime_bradford)

Plotting KDE for crime occurences in Bradford ward
ds <- density(crime_bradford.ppp)
class(ds)
## [1] "im"
plot(ds, main = "Crime density in Bradford ward, Manchester city, June 2019")

Doing the same with contours
K1 <- density(crime_bradford.ppp) # Using the default bandwidth
plot(K1, main=NULL, las=1)
contour(K1, add=TRUE)

Changing bandwidth
K2 <- density(crime_bradford.ppp, sigma=50) # Using a 50km bandwidth
plot(K2, main=NULL, las=1)
contour(K2, add=TRUE)

Testing for spatial randomness of crime occurrences in Bradford ward, Manchester city
K <- Kest(crime_bradford.ppp, correction="border")
plot(K)

Theoretical model of CSR, compare actual results to that.
With point data, we specify a real function T(x,y) defined at all locations x,y in our sampling window. We evaluate this function at each of the data points and compare the empirical values of T with the predicted distribution of T under the CSR assumptions.
ks <- cdf.test(crime_bradford.ppp, "x")
plot(ks)

pval <- ks$p.value
pval
## [1] 1.053552e-09
Density function as a covariate to estimate overall spatial randomness
ds <- density(crime_bradford.ppp)
k <- cdf.test(crime_bradford.ppp, ds)
plot(k)

The G function measures the distribution of distances from an arbitrary event to its nearest event (i.e. uses nearest neighbor distances). By plotting our empirically estimated G function against our theoretical expectation if the process were CSR, we can assess the extent to which the empirical distribution of our data is different from the theoretical CSR expectation.
gtest <- Gest(crime_bradford.ppp)
gtest
## Function value object (class 'fv')
## for the function r -> G(r)
## .....................................................................
## Math.label Description
## r r distance argument r
## theo G[pois](r) theoretical Poisson G(r)
## han hat(G)[han](r) Hanisch estimate of G(r)
## rs hat(G)[bord](r) border corrected estimate of G(r)
## km hat(G)[km](r) Kaplan-Meier estimate of G(r)
## hazard hat(h)[km](r) Kaplan-Meier estimate of hazard function h(r)
## theohaz h[pois](r) theoretical Poisson hazard function h(r)
## .....................................................................
## Default plot formula: .~r
## where "." stands for 'km', 'rs', 'han', 'theo'
## Recommended range of argument r: [0, 82.406]
## Available range of argument r: [0, 238.37]
plot(gtest)
## 7d. F test for CSR
ftest <- Fest(crime_bradford.ppp)
ftest
## Function value object (class 'fv')
## for the function r -> F(r)
## .....................................................................
## Math.label Description
## r r distance argument r
## theo F[pois](r) theoretical Poisson F(r)
## cs hat(F)[cs](r) Chiu-Stoyan estimate of F(r)
## rs hat(F)[bord](r) border corrected estimate of F(r)
## km hat(F)[km](r) Kaplan-Meier estimate of F(r)
## hazard hat(h)[km](r) Kaplan-Meier estimate of hazard function h(r)
## theohaz h[pois](r) theoretical Poisson hazard h(r)
## .....................................................................
## Default plot formula: .~r
## where "." stands for 'km', 'rs', 'cs', 'theo'
## Recommended range of argument r: [0, 194.37]
## Available range of argument r: [0, 239.58]
plot(ftest)

Carrying out DBSCAN to determine where the clusters are, after having established that CSR does not hold, using the tests above.
library(raster)
library(fpc)
library(plyr)
library(OpenStreetMap)
Checking CRS of the bradford spatial polygon
crs(bradford)
## CRS arguments:
## +init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84
## +towgs84=0,0,0
#first extract the points from the spatial points data frame
crime_bradford_points <- data.frame(crime_bradford@coords[,1:2])
#now run the dbscan analysis
db <- fpc::dbscan(crime_bradford_points, eps = 0.003, MinPts = 30)
#now plot the results
plot(db, crime_bradford_points, main = "DBSCAN Output", frame = F)
plot(bradford, add=T)

Can use kNNdistplot() from the dbscan package to find a suitable eps value based on the Ripley’s K plot. Above 300metres sees a sharp spike, so 0.003 would be good.
library(dbscan)
dbscan::kNNdistplot(crime_bradford_points, k = 30)

Using ggplot2 to plot a nicer map
library(ggplot2)
Calling cluster db object
db
## dbscan Pts=333 MinPts=30 eps=0.003
## 0 1 2
## border 214 29 23
## seed 0 6 61
## total 214 35 84
db$cluster
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [36] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0
## [71] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [106] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [141] 0 2 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 0 1 1 1 1 1 1
## [176] 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 2 0 2 2 2 2 2 2 0 2 2 2 2
## [211] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [246] 2 2 2 2 2 0 2 2 0 0 2 2 2 2 2 2 0 2 2 2 2 0 2 2 0 2 2 2 2 2 2 2 2 2 2
## [281] 0 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [316] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Adding this information back into crime dataframe
crime_bradford_points$cluster <- db$cluster
Creating convex hull polygons to wrap around the points in the clusters
chulls <- ddply(crime_bradford_points, .(cluster),
function(df) df[chull(df$coords.x1, df$coords.x2), ])
Can drop 0 from the dataframe, as they are not in clusters
chulls <- subset(chulls, cluster>=1)
Creating a ggplot object from clusters 1 and 2
dbplot <- ggplot(data=crime_bradford_points,
aes(coords.x1,coords.x2, colour=cluster, fill=cluster))
#add the points in
dbplot <- dbplot + geom_point()
#now the convex hulls
#convert spatialpointsdataframe to a dataframe as ggplot cannot handle it
df_polsub_bradford <- data.frame(polsub_bradford)
dbplot <- dbplot + geom_polygon(data = chulls,
aes(coords.x1,coords.x2, group=cluster),
alpha = 0.5) + geom_point(data=df_polsub_bradford,aes(x=coords.x1,y=coords.x2), inherit.aes = FALSE, color="red")
#now plot, setting the coordinates to scale correctly and as a black and white plot
dbplot + theme_bw() + coord_equal()
Checking CRS for bradford
crs(bradford)
## CRS arguments:
## +init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84
## +towgs84=0,0,0
Get the bbox in lat long
##First get the bbox in lat long for Bradford
latlong <- "+init=epsg:4326"
bradfordWGS <-spTransform(bradford, CRS(latlong))
bradfordWGS@bbox
## min max
## x -2.224532 -2.149895
## y 53.468712 53.489462
Convert basemap to the same CRS
basemap<-openmap(c(53.467712,-2.225532),c(53.489362,-2.149995), zoom=NULL,"stamen-toner")
# convert the basemap to British National Grid - remember we created the
# BNG object right at the beginning of the practical - it's an epsg string...
basemap_bng<-openproj(basemap, projection="+init=epsg:4326")
Adding a basemap
autoplot(basemap_bng) + geom_point(data=crime_bradford_points,
aes(coords.x1,coords.x2,
colour=cluster, fill=cluster)) +
geom_polygon(data = chulls, aes(coords.x1,coords.x2, group=cluster, fill=cluster),
alpha = 0.5) + geom_point(data=df_polsub_bradford,aes(x=coords.x1,y=coords.x2), inherit.aes = FALSE, color="red")
Plotting all crime occurences in Manchester city
tm_shape(manchester_merged_final) +
tm_polygons(col = NA, alpha = 0.5) +
tm_shape(crime_spdf) +
tm_dots(col = "blue")
Calculating the area of each polygon (ward) in Manchester city
library(rgdal)
getClass("Polygon")
## Class "Polygon" [package "sp"]
##
## Slots:
##
## Name: labpt area hole ringDir coords
## Class: numeric numeric logical integer matrix
##
## Extends: "Line"
manchester_merged_final$Area_sqkm<-sapply(slot(manchester_merged_final, "polygons"), function(x) sapply(slot(x, "Polygons"), slot, "area"))
str(manchester_merged_final$Area_sqkm)
## num [1:33] 0.000319 0.000215 0.001543 0.000554 0.000518 ...
class(manchester_merged_final$Area_sqkm)
## [1] "numeric"
manchester_merged_final$Area_sqkm <- lapply(manchester_merged_final$Area_sqkm, FUN= function(x) x*12100)
Extracting the area of each ward data from manchester_merged_final
citymanchester.df <- as.data.frame(manchester_merged_final)
citymanchester.df$wardarea <- as.numeric(rownames(citymanchester.df))
class(citymanchester.df$crimes)
## [1] "numeric"
class(citymanchester.df$Area_sqkm)
## [1] "list"
citymanchester.df$Area_sqkm <- as.numeric(as.character(unlist(citymanchester.df$Area_sqkm)))
citymanchester.df$crimedensity <- citymanchester.df$crimes / citymanchester.df$Area_sqkm
Joining crime density data to the manchester_merged_final map
Reading in the dataset for SES - the average social grade by ward was taken, by using a numeric scale of AB = 1, C1 = 2, C2 = 3, DE = 4, so lower numbers would mean a higher social grade.
crimedensity <- read_csv("crimedensity.csv")
summary(crimedensity)
## geography crimedensity
## Length:32 Min. : 17.40
## Class :character 1st Qu.: 28.20
## Mode :character Median : 39.30
## Mean : 51.33
## 3rd Qu.: 53.67
## Max. :369.80
Joining crime dennsity dataset with the Manchester basemap
manchester_final <- merge(manchester_merged_final, crimedensity, by.x = "name", by.y = "geography",duplicateGeoms = TRUE)
Plotting map to check if third join has succeeded
tmap_mode("view")
tm_shape(manchester_final) +
tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polsub) +
tm_dots(col = "blue")
Adding number of police stations to Manchester_final
#reading in data for police stations per ward
library(readr)
policestns <- read_csv("policestns.csv")
manchester_final <- merge(manchester_final, policestns, by.x = "name", by.y = "geography",duplicateGeoms = TRUE)
Some clustering in Manchester itself based on KDE diagrams above, and some clustering in Bradford ward (to be analysed later) Before being able to calculate Moran’s I and any similar statistics, we need to first define a spatial weights matrix
library(spdep)
Calculating the centroids of all wards in Manchester city
coordsWards <- coordinates(manchester_final)
plot(coordsWards)

Generating a spatial weights matrix, using contiguity edges corners.
#create a neighbours list
MWard_nb <- poly2nb(manchester_final, queen=T) #this is a neighbours list of queens contiguity
knn_wards <- knearneigh(coordsWards, k=4) #THIS IS A NEIGHBOURS LIST FOR NEAREST NEIGHBOURS CONTIGUITY
MWard_knn <- knn2nb(knn_wards)
#plot neighbours list of queens contiguity
plot(MWard_nb, coordinates(coordsWards), col="red") + plot(manchester_final, add=T)

## integer(0)
#plot neighbours list of nearest neighbours contiguity
plot(MWard_knn, coordinates(coordsWards), col="red") + plot(manchester_final, add=T)

## integer(0)
Create spatial weights object from weights
Mward.queens_weight <- nb2listw(MWard_nb, style="W")
head(Mward.queens_weight)
## $style
## [1] "W"
##
## $neighbours
## Neighbour list object:
## Number of regions: 33
## Number of nonzero links: 138
## Percentage nonzero weights: 12.67218
## Average number of links: 4.181818
##
## $weights
## $weights[[1]]
## [1] 0.25 0.25 0.25 0.25
##
## $weights[[2]]
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
##
## $weights[[3]]
## [1] 0.5 0.5
##
## $weights[[4]]
## [1] 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571
##
## $weights[[5]]
## [1] 0.25 0.25 0.25 0.25
##
## $weights[[6]]
## [1] 0.2 0.2 0.2 0.2 0.2
##
## $weights[[7]]
## [1] 0.3333333 0.3333333 0.3333333
##
## $weights[[8]]
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
##
## $weights[[9]]
## [1] 0.25 0.25 0.25 0.25
##
## $weights[[10]]
## [1] 0.25 0.25 0.25 0.25
##
## $weights[[11]]
## [1] 0.3333333 0.3333333 0.3333333
##
## $weights[[12]]
## [1] 0.3333333 0.3333333 0.3333333
##
## $weights[[13]]
## [1] 0.25 0.25 0.25 0.25
##
## $weights[[14]]
## [1] 0.2 0.2 0.2 0.2 0.2
##
## $weights[[15]]
## [1] 0.25 0.25 0.25 0.25
##
## $weights[[16]]
## [1] 0.5 0.5
##
## $weights[[17]]
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
##
## $weights[[18]]
## [1] 0.2 0.2 0.2 0.2 0.2
##
## $weights[[19]]
## [1] 0.25 0.25 0.25 0.25
##
## $weights[[20]]
## [1] 0.25 0.25 0.25 0.25
##
## $weights[[21]]
## [1] 0.2 0.2 0.2 0.2 0.2
##
## $weights[[22]]
## [1] 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571
##
## $weights[[23]]
## [1] 0.25 0.25 0.25 0.25
##
## $weights[[24]]
## [1] 0.3333333 0.3333333 0.3333333
##
## $weights[[25]]
## [1] 0.2 0.2 0.2 0.2 0.2
##
## $weights[[26]]
## [1] 0.5 0.5
##
## $weights[[27]]
## [1] 0.3333333 0.3333333 0.3333333
##
## $weights[[28]]
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
##
## $weights[[29]]
## [1] 0.2 0.2 0.2 0.2 0.2
##
## $weights[[30]]
## [1] 0.2 0.2 0.2 0.2 0.2
##
## $weights[[31]]
## [1] 0.25 0.25 0.25 0.25
##
## $weights[[32]]
## [1] 0.3333333 0.3333333 0.3333333
##
## $weights[[33]]
## [1] 1
##
## attr(,"mode")
## [1] "binary"
## attr(,"W")
## [1] TRUE
## attr(,"comp")
## attr(,"comp")$d
## [1] 4 6 2 7 4 5 3 6 4 4 3 3 4 5 4 2 6 5 4 4 5 7 4 3 5 2 3 6 5 5 4 3 1
Mward.nn_weight <- nb2listw(MWard_knn, style="W")
Now we have defined our Wij matrix, we can calculate the Moran’s I and other associated statistics.
Moran’s I test tells us whether we have clustered values (close to 1) or dispersed values (close to -1), we will calculate for the densities rather than raw values.
I_MWard_Global_Density <- moran.test(manchester_final@data$crimedensity, Mward.queens_weight)
I_MWard_Global_Density
##
## Moran I test under randomisation
##
## data: manchester_final@data$crimedensity
## weights: Mward.queens_weight
##
## Moran I statistic standard deviate = 2.4325, p-value = 0.007498
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.089142919 -0.031250000 0.002449644
Geary’s C test: insufficient evidence to reject the null hypothesis that similar values of crime densities are clustering
C_MWard_Global_Density <- geary.test(manchester_final@data$crimedensity, Mward.queens_weight)
C_MWard_Global_Density
##
## Geary C test under randomisation
##
## data: manchester_final@data$crimedensity
## weights: Mward.queens_weight
##
## Geary C statistic standard deviate = 0.24572, p-value = 0.4029
## alternative hypothesis: Expectation greater than statistic
## sample estimates:
## Geary C statistic Expectation Variance
## 0.95945997 1.00000000 0.02721973
Calculate local versions of Moran’s I statistic for each ward to see in which wards the hot-spots are
#use the localmoran function to generate I for each ward in the city
I_MWard_Local <- localmoran(manchester_final@data$crimes, Mward.queens_weight)
I_MWard_Local_Density <- localmoran(manchester_final@data$crimedensity, Mward.queens_weight)
#what does the output (the localMoran object) look like?
head(I_MWard_Local_Density)
## Ii E.Ii Var.Ii Z.Ii Pr(z > 0)
## 0 0.05492635 -0.03125 0.06619785 0.33493932 0.3688354
## 1 0.05140974 -0.03125 0.05039111 0.36822824 0.3563515
## 2 0.21394309 -0.03125 0.11361807 0.72741881 0.2334847
## 47 0.06948539 -0.03125 0.04587490 0.47032117 0.3190628
## 48 0.18555654 -0.03125 0.06619785 0.84265625 0.1997104
## 52 -0.01997972 -0.03125 0.05671380 0.04732498 0.4811271
Copying I score and p-valule back into the manchester_final spatialpolygonsdataframe
manchester_final@data$BLocI <- I_MWard_Local[,1]
manchester_final@data$BLocIz <- I_MWard_Local[,4]
manchester_final@data$BLocIR <- I_MWard_Local_Density[,1]
manchester_final@data$BLocIRz <- I_MWard_Local_Density[,4]
Set breaks manually
breaks1<-c(-1000,-2.58,-1.96,-1.65,1.65,1.96,2.58,1000)
Setting colour palette so higher values are redder
MoranColours<- rev(brewer.pal(8, "RdGy"))
Plotting the map
tm_shape(manchester_final) +
tm_polygons("BLocIRz",
style="fixed",
breaks=breaks1,
palette=MoranColours,
midpoint=NA,
title="Local Moran's I, Crime Occurences in Manchester by ward")
Converting manchester_final to a dataframe
manchester_df <- as.data.frame(manchester_final)
#define regression equation so no need to retype each time
reg=crimes~popdens+socialgrade+deprivation+polstns
social grade variable’s frequency distribution seems to not be normal - negatively skewed (more higher values translating to LOWER social grade). Check the result of a range of transformations along Tukey’s ladder.
library(car)
symbox(~`socialgrade`, manchester_df, na.rm=T, powers=seq(-3,3,by=.5))
deprivation variable’s frequency distribution seems to not be normal - positively skewed (more higher values translating to GREATER deprivation). Check the result of a range of transformations along Tukey’s ladder.
library(car)
symbox(~`deprivation`, manchester_df, na.rm=T, powers=seq(-3,3,by=.5))
Trying deprivation^2 for a more normal distribution
ggplot(manchester_df, aes(x=(`deprivation`)^2)) + geom_histogram()
Plotting it out on a scatterplot
qplot(x = (`deprivation`)^2, y = `crimes`, data=manchester_df)
qplot(x = (deprivation), y = crimes, data=manchester_df) Plotting crimes as dependent variable, OLS for popdens, socialgrade, deprivation
m <- lm(log1p(crimes) ~ popdens + deprivation + polstns, data=manchester_df)
summary(m)
##
## Call:
## lm(formula = log1p(crimes) ~ popdens + deprivation + polstns,
## data = manchester_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4695 -0.3300 -0.1003 0.1369 2.5334
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.552933 0.623038 7.308 4.76e-08 ***
## popdens -0.003269 0.004834 -0.676 0.5043
## deprivation 0.911520 0.494899 1.842 0.0757 .
## polstns 0.042547 0.263064 0.162 0.8726
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5787 on 29 degrees of freedom
## Multiple R-squared: 0.1437, Adjusted R-squared: 0.05512
## F-statistic: 1.622 on 3 and 29 DF, p-value: 0.2057
Check if residuals are normally distributed
#save the residuals into your dataframe
manchester_df$m_resids <- m$residuals
qplot(m$residuals) + geom_histogram()
Check for multicollinearity between explanatory variables
library(corrplot)
#pull out the columns we want to check for multicolinearity
tempdf <- manchester_df[,c("popdens","deprivation")]
#rename the columns to something shorter
names(tempdf) <- c("Med House Price", "Un Auth Absence")
#compute the correlation matrix for the two variables of interest
cormat <- cor(tempdf[,1:2], use="complete.obs", method="pearson")
#visualise the correlation matrix
corrplot(cormat)
Checking VIF to confirm no multicollinearity.
vif(m)
## popdens deprivation polstns
## 1.113247 1.069619 1.139611
Check if any spatial dependence in the residuals, first using queens neighbours
lm.morantest(m,Mward.queens_weight)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.queens_weight
##
## Moran I statistic standard deviate = 3.7725, p-value = 8.08e-05
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.36845178 -0.05835548 0.01279961
Then using nn
lm.morantest(m,Mward.nn_weight)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.nn_weight
##
## Moran I statistic standard deviate = 3.4091, p-value = 0.0003259
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.269773641 -0.062950514 0.009525663
Another way to check if necessary to run a spatial model based on the OLS is to do lagrange multiplier tests.
lm.LMtests(m,Mward.nn_weight,test=c("LMerr", "LMlag", "RLMerr", "RLMlag", "SARMA"))
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.nn_weight
##
## LMerr = 5.4191, df = 1, p-value = 0.01992
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.nn_weight
##
## LMlag = 8.307, df = 1, p-value = 0.003949
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.nn_weight
##
## RLMerr = 2.1493, df = 1, p-value = 0.1426
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.nn_weight
##
## RLMlag = 5.0371, df = 1, p-value = 0.02481
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.nn_weight
##
## SARMA = 10.456, df = 2, p-value = 0.005364
Start with a spatially lagged x model. Spatially lagged x model – y=Xß+WXT+e.
library(spatialreg)
reg2=lmSLX(m,data=manchester_df, Mward.queens_weight)
summary(reg2)
##
## Call:
## lm(formula = formula(paste("y ~ ", paste(colnames(x)[-1], collapse = "+"))),
## data = as.data.frame(x), weights = weights)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.98704 -0.25621 -0.07405 0.15080 1.78257
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6518400 0.8902668 2.979 0.0062 **
## popdens -0.0003125 0.0051387 -0.061 0.9520
## deprivation -0.0568893 0.6200070 -0.092 0.9276
## polstns 0.3230968 0.2459157 1.314 0.2004
## lag.popdens 0.0061168 0.0078331 0.781 0.4419
## lag.deprivation 2.1330028 0.9363487 2.278 0.0312 *
## lag.polstns 0.9902573 0.5543479 1.786 0.0857 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5082 on 26 degrees of freedom
## Multiple R-squared: 0.4079, Adjusted R-squared: 0.2713
## F-statistic: 2.985 on 6 and 26 DF, p-value: 0.02357
Running the SLM with a queen’s case weights matrix. Spatial Lag (Autoregressive) Model - y=pWy+XB+e
reg3=lagsarlm(m,data=manchester_df, Mward.nn_weight)
summary(reg3)
##
## Call:lagsarlm(formula = m, data = manchester_df, listw = Mward.nn_weight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.662125 -0.215728 -0.097012 0.121927 2.129947
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.8755373 0.9061059 2.0699 0.03846
## popdens -0.0027521 0.0038313 -0.7183 0.47256
## deprivation 0.3381174 0.3945617 0.8569 0.39148
## polstns 0.0265561 0.2069473 0.1283 0.89789
##
## Rho: 0.6108, LR test value: 8.4388, p-value: 0.0036729
## Asymptotic standard error: 0.14954
## z-value: 4.0844, p-value: 4.4195e-05
## Wald statistic: 16.682, p-value: 4.4195e-05
##
## Log likelihood: -22.42409 for lag model
## ML residual variance (sigma squared): 0.20723, (sigma: 0.45523)
## Number of observations: 33
## Number of parameters estimated: 6
## AIC: 56.848, (AIC for lm: 63.287)
## LM test for residual autocorrelation
## test value: 2.1553, p-value: 0.14208
Check overall impacts, as infinite feedback loops so p-values for individual variables are meaningless
impacts(reg3,listw=Mward.queens_weight)
## Impact measures (lag, exact):
## Direct Indirect Total
## popdens -0.00314703 -0.003924004 -0.007071035
## deprivation 0.38664119 0.482099500 0.868740695
## polstns 0.03036718 0.037864571 0.068231752
summary(impacts(reg3,listw=Mward.queens_weight,R=1000),zstats=TRUE) #Add zstats,pvals
## Impact measures (lag, exact):
## Direct Indirect Total
## popdens -0.00314703 -0.003924004 -0.007071035
## deprivation 0.38664119 0.482099500 0.868740695
## polstns 0.03036718 0.037864571 0.068231752
## ========================================================
## Simulation results (asymptotic variance matrix):
## Direct:
##
## Iterations = 1:997
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 997
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## popdens -0.003306 0.004709 0.0001491 0.0001491
## deprivation 0.380780 0.474955 0.0150420 0.0150420
## polstns 0.025214 0.254503 0.0080602 0.0080602
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## popdens -0.01209 -0.006401 -0.003262 -0.000275 0.00568
## deprivation -0.51158 0.071980 0.372939 0.704441 1.30224
## polstns -0.46446 -0.138039 0.019546 0.182830 0.53823
##
## ========================================================
## Indirect:
##
## Iterations = 1:997
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 997
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## popdens -0.005456 0.02434 0.0007708 0.0008682
## deprivation 0.605157 1.66843 0.0528398 0.0589077
## polstns 0.098095 0.78620 0.0248993 0.0263214
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## popdens -0.02574 -0.007902 -0.003559 -0.0002303 0.01085
## deprivation -1.00293 0.091026 0.402778 0.8285394 3.13574
## polstns -0.82812 -0.159029 0.023264 0.2165952 1.48515
##
## ========================================================
## Total:
##
## Iterations = 1:997
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 997
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## popdens -0.008762 0.02723 0.0008622 0.000954
## deprivation 0.985938 1.98355 0.0628195 0.068588
## polstns 0.123308 0.98003 0.0310378 0.033341
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## popdens -0.03684 -0.01506 -0.007513 -0.000591 0.0155
## deprivation -1.47234 0.19378 0.849485 1.529969 4.2874
## polstns -1.26404 -0.30803 0.048123 0.420586 1.9767
##
## ========================================================
## Simulated standard errors
## Direct Indirect Total
## popdens 0.004709365 0.0243376 0.02722546
## deprivation 0.474954507 1.6684331 1.98354556
## polstns 0.254502572 0.7862019 0.98002871
##
## Simulated z-values:
## Direct Indirect Total
## popdens -0.70195683 -0.2241828 -0.3218253
## deprivation 0.80171963 0.3627100 0.4970582
## polstns 0.09907033 0.1247704 0.1258212
##
## Simulated p-values:
## Direct Indirect Total
## popdens 0.48271 0.82262 0.74759
## deprivation 0.42272 0.71682 0.61915
## polstns 0.92108 0.90071 0.89987
Next do a SEM
reg4=errorsarlm(m,data=manchester_df, Mward.queens_weight)
summary(reg4)
##
## Call:
## errorsarlm(formula = m, data = manchester_df, listw = Mward.queens_weight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.538925 -0.245918 -0.052433 0.128232 2.009450
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.3364619 0.6052271 8.8173 <2e-16
## popdens -0.0045779 0.0042227 -1.0841 0.2783
## deprivation 0.2379737 0.4909464 0.4847 0.6279
## polstns -0.1408868 0.1842669 -0.7646 0.4445
##
## Lambda: 0.61019, LR test value: 9.9545, p-value: 0.0016046
## Asymptotic standard error: 0.14506
## z-value: 4.2064, p-value: 2.5947e-05
## Wald statistic: 17.694, p-value: 2.5947e-05
##
## Log likelihood: -21.66624 for error model
## ML residual variance (sigma squared): 0.19362, (sigma: 0.44002)
## Number of observations: 33
## Number of parameters estimated: 6
## AIC: 55.332, (AIC for lm: 63.287)
Trying GWR
library(spgwr)
#calculate kernel bandwidth
GWRbandwidth <- gwr.sel(log1p(crimes) ~ popdens + deprivation + polstns, data=manchester_df, coords=coordsWards,adapt=T)
## Adaptive q: 0.381966 CV score: 11.40972
## Adaptive q: 0.618034 CV score: 11.6588
## Adaptive q: 0.236068 CV score: 11.47475
## Adaptive q: 0.3657329 CV score: 11.41114
## Adaptive q: 0.3896803 CV score: 11.41104
## Adaptive q: 0.3779032 CV score: 11.4095
## Adaptive q: 0.3771599 CV score: 11.4095
## Adaptive q: 0.3773912 CV score: 11.4095
## Adaptive q: 0.3774319 CV score: 11.4095
## Adaptive q: 0.3773505 CV score: 11.4095
## Adaptive q: 0.3773912 CV score: 11.4095
#run the gwr model
gwr.model = gwr(m, coords=coordsWards, adapt=GWRbandwidth, hatmatrix=TRUE, se.fit=TRUE)
#print the results of the model
gwr.model
## Call:
## gwr(formula = m, coords = coordsWards, adapt = GWRbandwidth,
## hatmatrix = TRUE, se.fit = TRUE)
## Kernel function: gwr.Gauss
## Adaptive quantile: 0.3773912 (about 12 of 33 data points)
## Summary of GWR coefficient estimates at data points:
## Min. 1st Qu. Median 3rd Qu. Max.
## X.Intercept. 4.34333309 4.45449801 4.76115937 5.19473665 5.60632329
## popdens -0.00734385 -0.00589415 -0.00432275 -0.00226600 -0.00073926
## deprivation 0.31889085 0.52964949 0.79842305 0.88945093 0.99184834
## polstns -0.01367024 0.01095601 0.02308837 0.03409300 0.04536910
## Global
## X.Intercept. 4.5529
## popdens -0.0033
## deprivation 0.9115
## polstns 0.0425
## Number of data points: 33
## Effective number of parameters (residual: 2traceS - traceS'S): 8.1081
## Effective degrees of freedom (residual: 2traceS - traceS'S): 24.8919
## Sigma (residual: 2traceS - traceS'S): 0.552994
## Effective number of parameters (model: traceS): 6.571103
## Effective degrees of freedom (model: traceS): 26.4289
## Sigma (model: traceS): 0.5366732
## Sigma (ML): 0.4802777
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 65.70114
## AIC (GWR p. 96, eq. 4.22): 51.81726
## Residual sum of squares: 7.612001
## Quasi-global R2: 0.3288733
Collect results
results<-as.data.frame(gwr.model$SDF)
names(results)
## [1] "sum.w" "X.Intercept." "popdens"
## [4] "deprivation" "polstns" "X.Intercept._se"
## [7] "popdens_se" "deprivation_se" "polstns_se"
## [10] "gwr.e" "pred" "pred.se"
## [13] "localR2" "X.Intercept._se_EDF" "popdens_se_EDF"
## [16] "deprivation_se_EDF" "polstns_se_EDF" "pred.se.1"
## [19] "coord.x" "coord.y"
Attach results to original dataframe
#attach coefficients to original dataframe
manchester_final@data$coefpopdens<-results$popdens
manchester_final@data$coefdeprivation<-results$deprivation
manchester_final@data$coefpolstns<-results$polstns
Plotting GWR result for popdens
tm_shape(manchester_final) +
tm_polygons(col = "coefpopdens", palette = "RdBu", alpha = 0.5)
Plotting GWR results for deprivation
tm_shape(manchester_final) +
tm_polygons(col = "coefdeprivation", palette = "RdBu", alpha = 0.5)
Plotting GWR results for police stations
tm_shape(manchester_final) +
tm_polygons(col = "coefpolstns", palette = "RdBu", alpha = 0.5)
Calculating standard errors for statistical significance for population density
#run the significance test
#sigTest = abs(gwr.model$SDF$"log(`Median House Price (£) - 2014`)") -2 * gwr.model$SDF$"log(`Median House Price (£) - 2014`)_se"
sigTest = abs(gwr.model$SDF$"popdens") -2 * gwr.model$SDF$"popdens_se"
#store significance results
manchester_final$GWRpopdens<-sigTest
Plotting significance for population density on the map
tm_shape(manchester_final) +
tm_polygons(col = "GWRpopdens", palette = "RdYlBu")
Plotting significance for deprivation on the map
#run the significance test
sigTest = abs(gwr.model$SDF$"deprivation") -2 * gwr.model$SDF$"deprivation_se"
#store significance results
manchester_final$GWRdeprivation<-sigTest
#plotting
tm_shape(manchester_final) +
tm_polygons(col = "GWRdeprivation", palette = "RdYlBu")
sigTest
## [1] -0.214446417 -0.048627036 -0.042102614 -0.506553445 -0.096120189
## [6] -0.237855939 -0.518978569 -0.182680631 -0.779023348 -0.409469910
## [11] -0.128028815 -0.002661584 -0.501554521 -0.605923711 -0.152284373
## [16] -0.187226045 -0.243262200 -0.447880588 -0.513664133 -0.058096884
## [21] -0.135267178 -0.266873828 -0.210703630 -0.109190368 -0.715541184
## [26] -0.370186631 -0.300053543 -0.053356652 -0.228826888 -0.188139143
## [31] -0.674957244 -0.101914571 -0.081830056
Plotting significance for police stations on the map
#run the significance test
sigTest = abs(gwr.model$SDF$"polstns") -2 * gwr.model$SDF$"polstns_se"
#store significance results
manchester_final$GWRpolstns<-sigTest
#plotting
tm_shape(manchester_final) +
tm_polygons(col = "GWRpolstns", palette = "RdYlBu")